── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks plotly::filter(), stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# look up table for region abbreviationsregion_labels <-c("East Asia & Pacific"="EAP","Europe & Central Asia"="ECA","High income: OECD"="HIC","Latin America & Caribbean"="LAC","Middle East & North Africa"="MENA","South Asia"="SA","Sub-Saharan Africa"="SSA")# summary stats for gdp datagdp_summary <- df %>%group_by(Region) %>%summarise(Mean_GDP_Score =mean(gdp_per_person_employed, na.rm =TRUE),Median_GDP_Score =median(gdp_per_person_employed, na.rm =TRUE),SD_GDP_Score =sd(gdp_per_person_employed, na.rm =TRUE) )gdp_summary
# A tibble: 7 × 4
Region Mean_GDP_Score Median_GDP_Score SD_GDP_Score
<chr> <dbl> <dbl> <dbl>
1 East Asia & Pacific 44012. 33029. 49472.
2 Europe & Central Asia 49127. 48457. 20743.
3 High income: OECD 114705. 109444. 41572.
4 Latin America & Caribbean 43000. 39516. 23466.
5 Middle East & North Africa 66099. 58770. 33609.
6 South Asia 25070. 20069. 11615.
7 Sub-Saharan Africa 18903. 11898. 18469.
# A tibble: 7 × 4
Region Mean_Mobility_Score Median_Mobility_Score SD_Mobility_Score
<chr> <dbl> <dbl> <dbl>
1 East Asia & Pacif… 89 100 16.3
2 Europe & Central … 98.9 100 5.21
3 High income: OECD 100 100 0
4 Latin America & C… 90.6 100 13.8
5 Middle East & Nor… 46.2 37.5 39.1
6 South Asia 90.6 100 18.6
7 Sub-Saharan Africa 81.8 87.5 22.3
## distribution of gdp scores# histogramggplot(df, aes(x = gdp_per_person_employed, fill = Region)) +geom_histogram(binwidth =50000) +theme_minimal() +labs(title ="Distribution of GDP by Region",x ="GDP",y ="Frequency" ) +scale_x_continuous(breaks =seq(0, 10000000, by =50000)) +theme_minimal()
## distribution of mobility scores# histogrammobility_p <-ggplot(df, aes(x = MOBILITY, fill= Region)) +geom_histogram(binwidth =20) +theme_minimal() +labs(title ="Distribution of Mobility Scores",x ="Mobility Score",y ="Frequency" ) +scale_x_continuous(breaks =seq(0, 100, by =20)) +theme_minimal()ggplotly(mobility_p)
# compare mobility score across regionsggplot(df, aes(x = Region, y = MOBILITY, fill = Region)) +geom_boxplot() +scale_x_discrete(labels = region_labels) +theme(axis.text.x =element_text(angle =45, hjust =1)) +labs(title ="Mobility Scores by Region", x ="Region", y ="Mobility Score")
# A tibble: 45 × 5
Region Indicator Response Count Proportion
<chr> <chr> <chr> <int> <dbl>
1 East Asia & Pacific Can.a.woman.apply.for.a.pass… No 6 0.00789
2 East Asia & Pacific Can.a.woman.apply.for.a.pass… Yes 19 0.025
3 East Asia & Pacific Can.a.woman.choose.where.to.… No 3 0.00395
4 East Asia & Pacific Can.a.woman.choose.where.to.… Yes 22 0.0289
5 East Asia & Pacific Can.a.woman.travel.outside.h… No 2 0.00263
6 East Asia & Pacific Can.a.woman.travel.outside.h… Yes 23 0.0303
7 East Asia & Pacific Can.a.woman.travel.outside.t… Yes 25 0.0329
8 Europe & Central Asia Can.a.woman.apply.for.a.pass… No 1 0.00132
9 Europe & Central Asia Can.a.woman.apply.for.a.pass… Yes 22 0.0289
10 Europe & Central Asia Can.a.woman.choose.where.to.… Yes 23 0.0303
# ℹ 35 more rows
# look up table for mobility indicatorsmobility_labels <-c("Can.a.woman.choose.where.to.live.in.the.same.way.as.a.man."="Choice of Living Situation", "Can.a.woman.travel.outside.her.home.in.the.same.way.as.a.man."="Travel Outside Home", "Can.a.woman.apply.for.a.passport.in.the.same.way.as.a.man."="Passport Acquisition", "Can.a.woman.travel.outside.the.country.in.the.same.way.as.a.man."="Intercontinental Travel")# visualize proportionsggplot(mobility_proportions, aes(x = Region, y = Proportion, fill = Response)) +geom_bar(stat ="identity", position ="fill") +facet_wrap(~Indicator, scales ="free", labeller =labeller(Indicator = mobility_labels)) +scale_x_discrete(labels = region_labels) +theme(axis.text.x =element_text(angle =45, hjust =1)) +labs(title ="Proportions of Yes/No Responses for Mobility Indicators By Region",x ="Region", y ="Proportion" )
mob_plt_1 <-ggplot(df, aes(x = Can.a.woman.choose.where.to.live.in.the.same.way.as.a.man., fill = Region)) +geom_bar(position ="dodge") +# 'dodge' puts bars side by sidelabs(title ="Can a woman choose where to live in the same way as a man by Region", x ="Can a woman choose where to live in the same way as a man", y ="Count") +theme_minimal()ggplotly(mob_plt_1)
mob_plt_2 <-ggplot(df, aes(x = Can.a.woman.travel.outside.her.home.in.the.same.way.as.a.man., fill = Region)) +geom_bar(position ="dodge") +# 'dodge' puts bars side by sidelabs(title ="Can a woman travel outside her home in the same way as a man", x ="Can a woman travel outside her home in the same way as a man", y ="Count") +theme_minimal()ggplotly(mob_plt_2)
mob_plt_3 <-ggplot(df, aes(x = Can.a.woman.apply.for.a.passport.in.the.same.way.as.a.man., fill = Region)) +geom_bar(position ="dodge") +# 'dodge' puts bars side by sidelabs(title ="Can a woman apply for a passport in the same way as a man", x ="Can a woman apply for a passport in the same was as a man", y ="Count") +theme_minimal()ggplotly(mob_plt_3)
mob_plt_4 <-ggplot(df, aes(x = Can.a.woman.travel.outside.the.country.in.the.same.way.as.a.man., fill = Region)) +geom_bar(position ="dodge") +# 'dodge' puts bars side by sidelabs(title ="Can a woman travel outside the country in the same way as a man?", x ="Can a woman travel outside the country in the same way as a man?", y ="Count") +theme_minimal()ggplotly(mob_plt_4)
# Welch Two Sample t-testt_test_result <-t.test(MOBILITY ~ Can.a.woman.choose.where.to.live.in.the.same.way.as.a.man., data = df)print(t_test_result)
Welch Two Sample t-test
data: MOBILITY by Can.a.woman.choose.where.to.live.in.the.same.way.as.a.man.
t = -11.11, df = 38.442, p-value = 1.449e-13
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-58.07865 -40.18138
sample estimates:
mean in group No mean in group Yes
46.62162 95.75163
# Welch Two Sample t-testt_test_result <-t.test(MOBILITY ~ Can.a.woman.travel.outside.her.home.in.the.same.way.as.a.man., data = df)print(t_test_result)
Welch Two Sample t-test
data: MOBILITY by Can.a.woman.travel.outside.her.home.in.the.same.way.as.a.man.
t = -12.536, df = 17.37, p-value = 3.921e-10
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-78.48727 -55.90579
sample estimates:
mean in group No mean in group Yes
25.00000 92.19653
# Welch Two Sample t-testt_test_result <-t.test(MOBILITY ~ Can.a.woman.apply.for.a.passport.in.the.same.way.as.a.man., data = df)print(t_test_result)
Welch Two Sample t-test
data: MOBILITY by Can.a.woman.apply.for.a.passport.in.the.same.way.as.a.man.
t = -8.5657, df = 48.039, p-value = 3.098e-11
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-46.41090 -28.76507
sample estimates:
mean in group No mean in group Yes
56.70732 94.29530
# Welch Two Sample t-testt_test_result <-t.test(MOBILITY ~ Can.a.woman.travel.outside.the.country.in.the.same.way.as.a.man., data = df)print(t_test_result)
Welch Two Sample t-test
data: MOBILITY by Can.a.woman.travel.outside.the.country.in.the.same.way.as.a.man.
t = -18.855, df = 10.716, p-value = 1.46e-09
alternative hypothesis: true difference in means between group No and group Yes is not equal to 0
95 percent confidence interval:
-89.83431 -70.99902
sample estimates:
mean in group No mean in group Yes
10.00000 90.41667
# scatter plot for Mobility score vs gpd per person employedreg_mob <-ggplot(df, aes(x = MOBILITY, y = gdp_per_person_employed)) +geom_point() +geom_smooth(method ="lm", color ="red") +theme_minimal() +labs(title ="Mobility Score vs GDP per Person Employed",x ="Mobility Score", y ="GDP per Person Employed" )ggplotly(reg_mob)
`geom_smooth()` using formula = 'y ~ x'
# density plotggplot(df, aes(x = MOBILITY)) +geom_density(fill ="skyblue", alpha =0.5, color ="black") +geom_vline(aes(xintercept =mean(MOBILITY, na.rm =TRUE)), color ="red", linetype ="dashed", linewidth =1) +geom_vline(aes(xintercept =median(MOBILITY, na.rm =TRUE)), color ="blue", linetype ="dashed", linewidth =1) +theme_minimal() +labs(title ="Density of Mobility Score", x ="Mobility Score", y ="Density") +annotate("text", x =mean(df$MOBILITY, na.rm =TRUE), y =0.015, label ="Mean", color ="red", angle =90, vjust =1.5) +annotate("text", x =median(df$MOBILITY, na.rm =TRUE), y =0.012, label ="Median", color ="blue", angle =90, vjust =1.5)